clear all
set more off

use "$inputs/prowess/Productwise Energy Consumption.dta", clear
gen product_name5 = upper(product4)
gen idm = _n
reclink2 product_name5 using "$inputs/handcoded/droplist.dta", idm(idm) idu(idu) gen(score) manytoone
drop if score >= 0.8 & !missing(score)
drop idm idu Uproduct_name5 _merge score
save "$working/energy_temp2.dta", replace
 
use "$working/energy_temp2.dta", clear
split prodwise_energy_cons_unit11, parse(/)

rename prodwise_energy_cons_unit112 unit1
replace unit1=upper(unit1)
merge m:1 unit1 using "$inputs/handcoded/unit_conversion.dta"
keep if _merge ==3
drop _merge unit1
rename unit product_unit
rename conversion product_conversion

rename prodwise_energy_cons_unit111 unit1
replace unit1=upper(unit1)
merge m:1 unit1 using "$inputs/handcoded/unit_conversion.dta"
keep if _merge ==3
drop _merge unit1
rename unit Unit
rename conversion energy_conversion

rename energy_cons_name7 energy_cons_name
merge m:1 energy_cons_name Unit using "$inputs/handcoded/energy_content.dta"
keep if _merge == 3
drop _merge
rename Unit energy_unit
* we can improve the information on energy_content.dta to get more observations
gen energy_intensity = (prodwise_energy_cons_qty * energy_content_MJ) * (energy_conversion / product_conversion)

collapse (sum) energy_intensity, by(prodener_cocode1 prodener_date3 product4 product_unit)

* drop 0 intensity duplicates 
duplicates tag prodener_cocode1 prodener_date3 product4, g(tag)
drop if tag==1 & energy_intensity==0
drop tag

* drop if firm reports multiple units for one of its products
duplicates tag prodener_cocode1 prodener_date3 product4, g(tag)
bys prodener_cocode1 prodener_date3: egen duplicated_units = total(tag)
keep if duplicated_units == 0
drop duplicated_units tag


rename prodener_cocode1 co_code
joinby co_code product4 using "$working/reclink_short_.dta"
drop product4
sort co_code prodener_date3 product_name5 product_unit
rename co_code products_cocode1 
rename prodener_date3 prod_date3

duplicates tag products_cocode1  prod_date3 product_name5 product_unit, g(tag)
bys products_cocode1  prod_date3: egen duplicated_products = total(tag)
keep if duplicated_products == 0
drop tag duplicated_products
merge 1:1 products_cocode1 prod_date3 product_name5 product_unit using "$working/ppt_clean.dta"
drop if _merge == 1
drop _merge
* generate energy revenue share
egen sales_firm = total(sales_value32), by(products_cocode1 prod_date3)
gen sales_share = sales_value32/sales_firm
egen sales_share_with_energy = total(sales_share) if !missing(energy_intensity), by(products_cocode1 prod_date3)
bys products_cocode1 (prod_date3 energy_intensity): carryforward sales_share_with_energy, gen(energy_revenue_share) dynamic_condition(prod_date3[_n-1] == prod_date3)
replace energy_revenue_share = 0 if missing(energy_revenue_share)
drop sales_firm sales_share sales_share_with_energy
order products_cocode1  prod_date3 ann_rep_months4 product_name5 product_unit sales_unit

save "$working/product_energy_cleaned.dta", replace






